clear
*1940
do "usa_00087.do"
clear
*1920
do "usa_00086.do"

clear
use "1920_full.dta"

*Exclude people living in group quarters
keep if gq~=3 & gq~=4

local vars occscore 
foreach x of local vars{
gen max`x'parent = max(occscore_mom, occscore_pop)
}

gen occmaxoccscore = occ1950_mom if occscore_mom==maxoccscoreparent
replace occmaxoccscore = occ1950_pop if occscore_pop==maxoccscoreparent

keep histid age sex maxoccscoreparent race stateicp countyicp occmaxoccscore occ1950_mom occ1950_pop occscore_pop

*Rename the variables to indicate the year they are from
for var histid age sex maxoccscoreparent race stateicp countyicp occmaxoccscore occ1950_mom occ1950_pop occscore_pop: rename X X1920

*Merge with Eckert crosswalk
rename stateicp1920 icpsrst 
rename countyicp1920 icpsrcty
	
joinby icpsrst icpsrcty using "EGLP_1920.dta", unmatched(both)

rename _merge merge1920crosswalk

gen cty_fips = nhgisst_1990*100 + (nhgiscty_1990/10)

egen tagctyfipsicpsr = tag(cty_fips icpsrst icpsrcty)

*Drop non-merged from crosswalk
drop if merge1920crosswalk==2

*There are some counties from IPUMS that don't merge to the crosswalk.  None of these affect normal school/asylum counties, so drop them
*See parent_occ1920.do for details.
drop if merge1920crosswalk==1

merge m:1 cty_fips using "justnormasylum.dta"
rename _merge mergenormasylum

*Keep normal school and asylum counties
keep if mergenormasylum==3

rename cty_fips cty_fips1920
drop year icpsrst icpsrcty nhgisst_1990 nhgiscty_1990 nhgisst nhgiscty statenam nhgisnam area_base statenam_1990 nhgisnam_1990 icpsrst_1990 icpsrcty_1990 area us_state 
for var hasnormalschool hasnormalorasylum mergenormasylum: rename X X1920

tostring cty_fips1920, gen(cty_fipstr)
gen statefip1920 = substr(cty_fipstr, 1,1) if length(cty_fipstr)==4
replace statefip1920 = substr(cty_fipstr, 1, 2) if length(cty_fipstr)==5
destring statefip1920, replace

*Drop DE, Georgia, RI, SC, NV, WY, AZ, NM, AK, HI because they don't have at least one normal school and one asylum county

drop if statefip==2|statefip==4|statefip==10|statefip==13|statefip==15|statefip==32|statefip==35|statefip==44|statefip==45|statefip==56

save "1920_small.dta", replace


use "1940_full.dta", clear

keep histid age sex stateicp countyicp marst school higrade empstat occ1950 incwage occscore incwage_sp occ1950_sp

* rename the variables to indicate the year they are from
for var histid age sex stateicp countyicp marst school higrade empstat occ1950 incwage occscore incwage_sp occ1950_sp: rename X X1940

save "1940_small.dta", replace

clear
*Merging the IPUMS censuses to the Census Tree crosswalks
import delimited using "1920_1940.csv", varnames(1)
save "1920_1940.dta", replace

merge 1:m histid1920 using "1920_small.dta"
rename _merge mergetree1920

gen merged1920 = mergetree1920==3
drop if mergetree1920==1


gen male1920 = sex1920==1
replace male1920 = . if sex1920==.

gen white1920 = race1920==1
replace white1920 = . if race1920==.

gen black1920 = race1920==2
replace black1920 = . if race1920==.

gen occparentsnonmiss = (occ1950_pop1920~=.|occ1950_mom1920~=.)

*Only keep those that are living with at least one of their parents--to avoid getting people who traveled for school, asylum

keep if occparentsnonmiss==1

egen taghistid1920 =tag(histid1920)
tab taghistid1920

keep if mergetree1920==3

merge m:1 histid1940 using "1940_small.dta"
rename _merge mergetree1940
drop if mergetree1940==2

drop if mergetree1940==1

gen atlhs = higrade1940>=15
replace atlhs = . if higrade1940==.
replace atlhs = . if higrade1940==99

gen atlsomecoll = higrade1940>=16
replace atlsomecoll = . if higrade1940==.
replace atlsomecoll = . if higrade1940==99

gen atlcoll = higrade1940>=19
replace atlcoll = . if higrade1940==.
replace atlcoll = . if higrade1940==99

local vars incwage1940 incwage_sp1940
foreach x of local vars{
gen `x'U = `x'
replace `x'U = . if `x'==999999
replace `x'U = . if `x'==999998
}

gen unsk = occ19501940>=700 & occ19501940<=970
replace unsk = . if (occ19501940==.|occ19501940==999)

gen farmer = occ19501940==100 | occ19501940==123
replace farmer = . if (occ19501940==.|occ19501940==999)

gen unsk_sp = occ1950_sp1940>=700 & occ1950_sp1940<=970
replace unsk_sp = . if (occ1950_sp1940==.|occ1950_sp1940==999)

gen farmer_sp = occ1950_sp1940==100 | occ1950_sp1940==123
replace farmer_sp = . if (occ1950_sp1940==.|occ1950_sp1940==999)

egen hhincwage = rowtotal(incwage1940U incwage_sp1940U), missing

gen emp = empstat1940==1
replace emp = . if empstat1940==.

gen married = marst1940==1
replace married = . if marst1940==.

gen unsk1940 = occ19501940>=700 & occ19501940<=970
replace unsk1940 = . if (occ19501940==.|occ19501940==999)

gen unsk_sp1940 = occ1950_sp1940>=700 & occ1950_sp1940<=970
replace unsk_sp1940 = . if (occ1950_sp1940==.|occ1950_sp1940==999)

gen farmer1940hh = occ19501940==100|occ19501940==123|occ1950_sp1940==100|occ1950_sp1940==123
replace farmer1940hh = . if ((occ19501940==.) & (occ1950_sp1940==.))

gen farmer1940 = occ19501940==100|occ19501940==123

gen whitecollar1940 = (occ19501940>=0 & occ19501940<=99)|(occ19501940>=200 & occ19501940<=490)
gen whitecollar_sp1940 = (occ1950_sp1940>=0 & occ1950_sp1940<=99)|(occ1950_sp1940>=200 & occ1950_sp1940<=490)

gen craftoper1940 = occ19501940>=500 & occ19501940<=690
gen craftoper_sp1940 = occ1950_sp1940>=500 & occ1950_sp1940<=690

gen nonocc1940 = occ19501940>=980 & occ19501940<=999

gen nonocc_sp1940 = occ1950_sp1940>=980 & occ1950_sp1940<=999

local vars unsk craftoper whitecollar
foreach x of local vars{
egen `x'hh = rowmax(`x'1940 `x'_sp1940)
replace `x'hh = . if occ19501940==. & occ1950_sp1940==.
}

gen nonocchh = nonocc1940==1 & nonocc_sp1940==1
replace nonocchh = 1 if nonocc1940==1 & occ1950_sp1940==.
replace nonocchh = . if occ19501940==. & occ1950_sp1940==.

gen dadunsk1920 = occ1950_pop1920>=700 & occ1950_pop1920<=970
gen momunsk1920 = occ1950_mom1920>=700 & occ1950_mom1920<=970

gen dadwhitecollar1920 = (occ1950_pop1920>=0 & occ1950_pop1920<=99)|(occ1950_pop1920>=200 & occ1950_pop1920<=490)
gen momwhitecollar1920 = (occ1950_mom1920>=0 & occ1950_mom1920<=99)|(occ1950_mom1920>=200 & occ1950_mom1920<=490)

gen dadcraftoper1920 = occ1950_pop1920>=500 & occ1950_pop1920<=690
gen momcraftoper1920 = occ1950_mom1920>=500 & occ1950_mom1920<=690

gen dadnonocc1920 = occ1950_pop1920>=980 & occ1950_pop1920<=999
gen momnonocc1920 = occ1950_mom1920>=980 & occ1950_mom1920<=999

gen parent_farmer = occ1950_pop1920==100|occ1950_pop1920==123|occ1950_mom1920==100|occ1950_mom1920==123
replace parent_farmer = . if ((occ1950_pop1920==.) & (occ1950_mom1920==.))

gen dad_farmer = occ1950_pop1920==100|occ1950_pop1920==123

local vars unsk craftoper whitecollar 
foreach x of local vars{
egen parent`x' = rowmax(dad`x'1920 mom`x'1920)
replace parent`x' = . if occ1950_pop1920==. & occ1950_mom1920==.
}

gen occdiffhh = unskhh==1 & parentunsk~=1
replace occdiffhh = 1 if craftoperhh==1 & parentcraftoper~=1
replace occdiffhh = 1 if whitecollarhh==1 & parentwhitecollar~=1
replace occdiffhh = 1 if farmer1940hh==1 & parent_farmer~=1

gen occdiffdad = unsk1940==1 & dadunsk~=1
replace occdiffdad = 1 if craftoper1940==1 & dadcraftoper~=1
replace occdiffdad = 1 if whitecollar1940==1 & dadwhitecollar~=1
replace occdiffdad = 1 if farmer1940==1 & dad_farmer~=1

gen parentnonocc = dadnonocc1920==1 & momnonocc1920==1
replace parentnonocc = 1 if dadnonocc1920==1 & occ1950_mom1920==.
replace parentnonocc = 1 if momnonocc1920==1 & occ1950_pop1920==.

gen constant = 1

gen poshhwage = hhincwage>0 & hhincwage~=.

preserve
do "parent_occ1920.do"
restore

gen bmedmaxoccscoreparent1920 = maxoccscoreparent1920<=20 if white1920==1
replace bmedmaxoccscoreparent1920 = maxoccscoreparent1920<=20 if white1920==0

save "1920_1940_merged.dta", replace

********************
*Tables A10, A12: Occupations 1920-1940
*******************
set seed 349132


preserve

keep if age1920>=6 

keep if (white1920==1|black1920==1)

tab age1920
tab age1940
count
#delimit ;
local vars occdiffhh unskhh craftoperhh whitecollarhh nonocchh farmer1940hh male1920 parentunsk parentcraftoper parentwhitecollar parentnonocc parent_farmer constant;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) occdiffhh unskhh craftoperhh whitecollarhh nonocchh farmer1940hh parentunsk parentcraftoper parentwhitecollar parentnonocc parent_farmer male1920 constant (mean)statefip1920 hasnormalschool, by(cty_fips1920 white1920);

#delimit cr

su constant if white1920==1, d

tab cty_fips1920 hasnormalschool if constant<=10 & white1920==1
tab statefip hasnormalschool if white1920==1 

tab cty_fips1920 hasnormalschool if constant<=10 & white1920==0
tab statefip hasnormalschool if white1920==0 

gen constantrd = round(constant,1)
gen below10 = constantrd<10
count if below10==1
bysort statefip hasnormalschool white1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920: egen maxfracbelow10 = max(fracbelow10)

tab statefip white1920 if maxfracbelow10>=.5
drop if maxfracbelow10>=.5

local vars  parent_farmer parentunsk parentcraftoper parentwhitecollar parentnonocc unskhh craftoperhh whitecollarhh nonocchh farmer1940hh male1920 
foreach x of local vars{
gen `x'mean = `x'/constant
}

local vars occdiffhh 
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocchh)
}

sort cty_fips

local vars occdiffhh unskhh craftoperhh whitecollarhh nonocchh  farmer1940hh male1920 parentunsk parentcraftoper parentwhitecollar  parentnonocc parent_farmer  
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1920==1, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe `x'mean hasnormalschool if white1920==0, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940bl.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)


#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0;
#delimit cr
}

restore


*****
*Tables A11, A13: Male Occupations 1920-1940

preserve

keep if age1920>=6
keep if male1920==1
*Drop if dad's location not found
keep if occ1950_pop1920~=.

keep if (white1920==1|black1920==1)

tab age1920
tab age1940
count
#delimit ;
local vars constant occdiffdad unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920  ;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) occdiffdad unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920 constant  (mean)statefip1920 hasnormalschool, by(cty_fips1920 white1920);

#delimit cr

su constant if white1920==1, d

tab cty_fips1920 hasnormalschool if constant<=10 & white1920==1
tab statefip hasnormalschool if white1920==1 

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool white1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920: egen maxfracbelow10 = max(fracbelow10)

*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5


local vars   unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920
foreach x of local vars{
gen `x'mean = `x'/constant
}

local vars occdiffdad 
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocc1940)
}

sort cty_fips
local vars occdiffdad unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920  
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1920==1, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe `x'mean hasnormalschool if white1920==0, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940bl.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0;
#delimit cr
}

restore

*Tables 2, A15, A16: Effects by parental occupation score

forvalues i = 0(1)1{
preserve
keep if (white1920==1|black1920==1)

keep if bmedmaxoccscoreparent1920==`i'

#delimit ;
local vars constant atlhs atlsomecoll atlcoll hhincwage emp married poshhwage;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) constant atlhs atlsomecoll atlcoll hhincwage emp married poshhwage (mean) statefip1920 hasnormalschool, by(cty_fips1920 white1920 male1920);


#delimit cr

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool white1920 male1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920 male1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920 male1920: egen maxfracbelow10 = max(fracbelow10)

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5


#delimit cr
local vars atlhs atlsomecoll atlcoll emp married
foreach x of local vars{
gen `x'mean = `x'/constant
}

gen hhincwagemean = hhincwage/poshhwage
gen lnhhincwagemean = ln(hhincwagemean)

sort cty_fips
local vars atlhs atlsomecoll atlcoll married lnhhincwage emp

foreach x of local vars{

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==1, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==0, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)


reghdfe `x'mean hasnormalschool1920 if white1920==0 & male1920==1, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940bl.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0 & male1920==1;

#delimit cr

reghdfe `x'mean hasnormalschool1920 if white1920==0 & male1920==0, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940bl.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)
#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0 & male1920==0;

#delimit cr
}
restore
}

***************************
*Table A17: Effects for children whose parents' occupation score is below median, but exclude children of farmers

preserve
keep if bmedmaxoccscoreparent1920==1
drop if occmaxoccscore==100|occmaxoccscore==123

keep if (white1920==1|black1920==1)

#delimit ;
local vars constant atlhs atlsomecoll atlcoll hhincwage emp married poshhwage;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) constant atlhs atlsomecoll atlcoll hhincwage emp married poshhwage (mean) statefip1920 hasnormalschool, by(cty_fips1920 white1920 male1920);

#delimit cr

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool white1920 male1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920 male1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920 male1920: egen maxfracbelow10 = max(fracbelow10)

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5

#delimit cr
local vars atlhs atlsomecoll atlcoll emp married
foreach x of local vars{
gen `x'mean = `x'/constant
}

gen hhincwagemean = hhincwage/poshhwage
gen lnhhincwagemean = ln(hhincwagemean)

local vars atlhs atlsomecoll atlcoll married lnhhincwage emp 

foreach x of local vars{

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==1, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==0, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)
}
restore

**************
*Appendix Text Section B3, first five paragraphs: Description of Census Tree Merge

clear
use "1920_1940.dta"

merge 1:m histid1920 using "1920_small.dta"
rename _merge mergetree1920

gen merged1920 = mergetree1920==3
drop if mergetree1920==1

gen male1920 = sex1920==1
replace male1920 = . if sex1920==.

gen white1920 = race1920==1
replace white1920 = . if race1920==.

gen cty_fips = cty_fips1920

gen occparentsnonmiss = (occ1950_pop1920~=.|occ1950_mom1920~=.)

*Only keep those that are living with at least one of their parents--to avoid getting people who traveled for school, asylum

keep if occparentsnonmiss==1

preserve

#delimit cr
gen constant = 1

egen taghistid1920 = tag(histid1920)

*Number merging by sex
tab mergetree1920 sex1920 if white1920==1 & taghistid1920==1, missing

#delimit ;
local vars maxoccscoreparent1920 constant merged1920;

foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};
#delimit ;
collapse (sum) constant merged1920 (mean) hasnormalschool statefip1920, by(cty_fips white1920 male1920);

#delimit cr

local vars constant merged1920
foreach x of local vars{
gen `x'mean = `x'/constant
}

rename merged1920mean fracmerged1920

*Test for differential merging likelihood between normal school and asylum counties
reghdfe fracmerged1920 hasnormalschool if white1920==1 & male1920==1, absorb(statefip) cluster(statefip1920)
su fracmerged1920 if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe fracmerged1920 hasnormalschool if white1920==1 & male1920==0, absorb(statefip) cluster(statefip1920)
su fracmerged1920 if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
outreg2 using censustree19201940.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

restore

*Correlates of merging at the individual level.  

reghdfe merged1920 maxoccscoreparent1920 if  white1920==1 & male1920==1, absorb(cty_fips1920) cluster(cty_fips1920)
outreg2 using censustree19201940.xls, append excel dec(3) 

reghdfe merged1920 maxoccscoreparent1920 if  white1920==1 & male1920==0, absorb(cty_fips1920) cluster(cty_fips1920)
outreg2 using censustree19201940.xls, append excel dec(3) 
  
preserve

*Footnote 55: Number not merging to 1940 census

*Keep only the records that merge to the Census Tree and keep only the people living with parents
#delimit cr
keep if mergetree1920==3

*Merge to 1940 record
merge m:1 histid1940 using "1940_small.dta"
rename _merge mergetree1940
drop if mergetree1940==2

egen taghistid1920 = tag(histid1920)
tab mergetree1940 sex1920 if white1920==1 & taghistid1920==1, missing

restore


